library(tidyverse)
library(httr)
library(sf)
library(ggplot2)
library(tmap)
library(plotly)
library(glue)
Define common columns
cols <- c(
"Date",
"Regio",
"Period",
"Population",
"Deaths",
"MortalityRate")
Load historical data for flu dataset
cols.historic <-
c(
cols,
"Year"
)
data.historic <- read_rds("../results/data.Rds") %>% rename(Regio = RegioS) %>% select(cols.historic)
# data.flu2018 <- data.historic %>%
# filter(Year == 2018 | Year == 2017) %>%
# group_by(Year) %>%
# mutate(
# LastPeriod = max(Period, na.rm = T),
# Period = if_else(Year == 2017 & Period == LastPeriod, 0, Period),
# ) %>%
# ungroup() %>%
# mutate(Year = if_else(Period == 0, 2018, Year)) %>%
# filter(Year == 2018) %>%
# rename(Flu2018 = MortalityRate)
Additional RIVM data
data.rivm <- read_rds("../results/rivm.Rds") %>% rename(Rivm = Deaths)
Data for 2020 corona crisis
cols.corona <-
c(
cols,
"ExpectedMortality", "AvgMortality", "UnexpectedMortality", "ExcessiveMortality", "Flu2018"
)
data.corona <- read_rds("../results/data.corona.Rds") %>% rename(Regio = RegioS) %>% select(cols.corona) %>%
full_join(data.rivm, by = c("Regio", "Date", "Period")) %>%
drop_na(Date)
data.corona %>% filter(Regio == "NL") %>% arrange(Date)
Geographical entities
areas <- GET("https://opendata.arcgis.com/datasets/e1f0dd70abcb4fceabbc43412e43ad4b_0.geojson") %>% content(as = "text") %>%
read_sf() %>%
rename(RegioS = Gemeentecode) %>%
arrange(Gemeentenaam)
No encoding supplied: defaulting to UTF-8.
write_rds(areas, "../results/plots/areas.Rds")
Create a timeline and add splines for smooth charts
# Do some interpolation
dates <- data.corona %>% drop_na("Period", "Date") %>% distinct(Date) %>% pull("Date")
fields <- c("MortalityRate", "ExpectedMortality", "AvgMortality", "Flu2018", "DeathsUnexpected", "DeathsAboveAvg", "Rivm", "Deaths")
days <- data.frame(Date = seq(min(dates), to = max(dates), by = "days")) %>%
crossing(data.corona %>% select(Regio) %>% distinct()) %>%
crossing(fields) %>%
set_names(c("Date", "Regio", "Variable"))
data.mort <- data.corona %>%
mutate(
DeathsUnexpected = UnexpectedMortality * Population / 100000,
DeathsAboveAvg = ExcessiveMortality * Population / 100000,
) %>%
gather("Variable", "Value", fields) %>%
drop_na(Regio, Variable, Date) %>%
full_join(days) %>%
group_by(Regio, Variable) %>%
filter(sum(!is.na(Value)) > 0) %>%
# summarise(n = n()) %>% arrange(n)
mutate(
Interpolated = approx(Date, Value, xout=Date)$y,
Absolute = if_else(Variable == "MortalityRate", Value * Population / 100000, as.double(NA))
) %>%
ungroup() %>%
select("Regio", "Date", "Period", "Variable", "Value", "Interpolated", "Absolute")
Joining, by = c("Date", "Regio", "Variable")
data.mort %>% filter(Regio == "NL") %>% arrange(Date)
write_rds(data.mort, "../results/plots/mortovertime.Rds")
Example 1
x <- data.mort %>%
filter(Variable %in% c("MortalityRate", "ExpectedMortality", "AvgMortality", "Flu2018")) %>%
filter(Regio == "GM0345")
tooltip <- ("<b>{Date}</b>
Mortality rate: {floor(Value)}
Deaths: {floor(Absolute)}"
)
ggplotly(tooltip = "text", ggplot(
x,
aes(
group = Variable,
color = Variable,
text = tooltip %>% glue()
)
) +
geom_point(aes(x = Date, y = Value)) +
geom_line(aes(x = Date, y = Interpolated)) +
labs(x = "Period (weeks)" %>% glue(), y = "Deaths by 100.000 inhabitants") +
theme_minimal() +
theme(legend.title = element_blank())
)
Example 2
x <- data.mort %>%
filter(Variable %in% c("Rivm", "DeathsAboveAvg", "DeathsUnexpected")) %>%
filter(Regio == "NL")
tooltip <- ("<b>{Date}</b>
Deaths: {floor(Interpolated)}")
ggplotly(ggplot(
x,
aes(
group = Variable,
color = Variable,
text = tooltip %>% glue()
)
) +
geom_line(aes(x = Date, y = Interpolated)) +
geom_point(aes(x = Date, y = Value)) +
labs(x = "Period (weeks)" %>% glue(), y = "Absolute deaths") +
theme_minimal() +
theme(legend.title = element_blank()) +
scale_color_brewer(palette = "Set2"),
tooltip = "text"
)
Spatial dataset
data.spatial_absolute <- areas %>% left_join(data.corona, by = c("RegioS" = "Regio")) %>%
mutate(
ActualMortality = round(MortalityRate * Population / 100000),
ExpectedMortality = round(ExpectedMortality * Population / 100000),
AvgMortality = round(AvgMortality * Population / 100000),
UnexpectedMortality = round(UnexpectedMortality * Population / 100000),
ExcessiveMortality = round(ExcessiveMortality * Population / 100000),
)
write_rds(data.spatial_absolute, "../results/plots/spatial_absolute.Rds")
Example
tm_shape(data.spatial_absolute %>% filter(Period == 4)) +
tm_polygons(
col = "MortalityRate",
id = "Gemeentenaam",
title = "Mortality rate by municipality"
)

Summary table
year_high_mort <- data.historic %>% group_by(Regio, Year) %>%
summarise(MortalityRate = mean(MortalityRate)) %>%
arrange(desc(MortalityRate)) %>%
slice(1) %>% arrange(Regio) %>% ungroup() %>%
select(YearHighestMort = Year, Regio)
data.summary <- data.mort %>%
filter(Period > 0 & Variable %in% c("Deaths", "Rivm", "DeathsAboveAvg", "DeathsUnexpected")) %>%
group_by(Regio, Variable) %>%
summarise(Value = floor(sum(Value, na.rm = T))) %>%
spread(Variable, Value) %>%
ungroup() %>%
left_join(year_high_mort)
Joining, by = "Regio"
write_rds(data.summary, "../results/plots/summary.Rds")
data.mort %>%
filter(Regio == "GM0345" & Variable %in% c("Rivm")) %>% arrange(Date) %>%
group_by(Regio, Variable) %>% summarise(Value = sum(Value, na.rm = T)) %>% spread(Variable, Value)
data.summary %>% filter(Regio == "GM0345")
---
title: "Preprocess data for plotting in the dashboard"
output: html_notebook
---

```{r}
library(tidyverse)
library(httr)
library(sf)
library(ggplot2)
library(tmap)
library(plotly)
library(glue)
```

### Define common columns

```{r}
cols <- c(
  "Date",
  "Regio",
  "Period",
  "Population",
  "Deaths",
  "MortalityRate")
```

### Load historical data for flu dataset

```{r}
cols.historic <-
  c(
    cols,
    "Year"
  )

data.historic <- read_rds("../results/data.Rds") %>% rename(Regio = RegioS) %>% select(cols.historic)

# data.flu2018 <- data.historic %>% 
#   filter(Year == 2018 | Year == 2017) %>% 
#   group_by(Year) %>%
#   mutate(
#     LastPeriod = max(Period, na.rm = T),
#     Period = if_else(Year == 2017 & Period == LastPeriod, 0, Period),
#   ) %>%
#   ungroup() %>%
#   mutate(Year = if_else(Period == 0, 2018, Year)) %>%
#   filter(Year == 2018) %>%
#   rename(Flu2018 = MortalityRate)

```

### Additional RIVM data

```{r}
data.rivm <- read_rds("../results/rivm.Rds") %>% rename(Rivm = Deaths)
```

### Data for 2020 corona crisis

```{r}
cols.corona <-
  c(
    cols,
    "ExpectedMortality", "AvgMortality", "UnexpectedMortality", "ExcessiveMortality", "Flu2018"
  )

data.corona <- read_rds("../results/data.corona.Rds") %>% rename(Regio = RegioS) %>% select(cols.corona) %>%
  full_join(data.rivm, by = c("Regio", "Date", "Period")) %>%
  drop_na(Date)

data.corona %>% filter(Regio == "NL") %>% arrange(Date)
```

### Geographical entities

```{r}
areas <- GET("https://opendata.arcgis.com/datasets/e1f0dd70abcb4fceabbc43412e43ad4b_0.geojson") %>% content(as = "text") %>%
  read_sf() %>%
  rename(RegioS = Gemeentecode) %>% 
  arrange(Gemeentenaam) 

write_rds(areas, "../results/plots/areas.Rds")
```

### Create a timeline and add splines for smooth charts

```{r}
# Do some interpolation
dates <- data.corona %>% drop_na("Period", "Date") %>% distinct(Date) %>% pull("Date")
fields <- c("MortalityRate", "ExpectedMortality", "AvgMortality", "Flu2018", "DeathsUnexpected", "DeathsAboveAvg", "Rivm", "Deaths")
days <- data.frame(Date = seq(min(dates), to = max(dates), by = "days")) %>%
  crossing(data.corona %>% select(Regio) %>% distinct()) %>%
  crossing(fields) %>%
  set_names(c("Date", "Regio", "Variable"))

data.mort <- data.corona %>% 
  mutate(
    DeathsUnexpected = UnexpectedMortality * Population / 100000,
    DeathsAboveAvg = ExcessiveMortality * Population / 100000,
  ) %>%
  gather("Variable", "Value", fields) %>%
  drop_na(Regio, Variable, Date) %>%
  full_join(days) %>%
  group_by(Regio, Variable) %>%
  filter(sum(!is.na(Value)) > 0) %>%
  # summarise(n = n()) %>% arrange(n)
  mutate(
    Interpolated = approx(Date, Value, xout=Date)$y,
    Absolute = if_else(Variable == "MortalityRate", Value * Population / 100000, as.double(NA))
  ) %>%
  ungroup() %>%
  select("Regio", "Date", "Period", "Variable", "Value", "Interpolated", "Absolute")

data.mort %>% filter(Regio == "NL") %>% arrange(Date)

write_rds(data.mort, "../results/plots/mortovertime.Rds")
```

#### Example 1

```{r purl=F}
x <- data.mort %>%
  filter(Variable %in% c("MortalityRate", "ExpectedMortality", "AvgMortality", "Flu2018")) %>%
  filter(Regio == "GM0345")

tooltip <- ("<b>{Date}</b>
Mortality rate: {floor(Value)}
Deaths: {floor(Absolute)}"
)

ggplotly(tooltip = "text", ggplot(
      x,
      aes(
        group = Variable,
        color = Variable,
        text = tooltip %>% glue()
      )
    ) +
      geom_point(aes(x = Date, y = Value)) +
      geom_line(aes(x = Date, y = Interpolated)) +
      labs(x = "Period (weeks)" %>% glue(), y = "Deaths by 100.000 inhabitants") +
      theme_minimal() +
      theme(legend.title = element_blank())
)
```

#### Example 2

```{r purl=F}
x <- data.mort %>% 
  filter(Variable %in% c("Rivm", "DeathsAboveAvg", "DeathsUnexpected")) %>%
  filter(Regio == "NL")


tooltip <- ("<b>{Date}</b>
Deaths: {floor(Interpolated)}")

ggplotly(ggplot(
  x,
  aes(
    group = Variable,
    color = Variable,
    text =  tooltip %>% glue()
  )
) +
  geom_line(aes(x = Date, y = Interpolated)) +
  geom_point(aes(x = Date, y = Value)) +
  labs(x = "Period (weeks)" %>% glue(), y = "Absolute deaths") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  scale_color_brewer(palette = "Set2"),

  tooltip = "text"
)
```

### Spatial dataset

```{r}
data.spatial_absolute <- areas %>% left_join(data.corona, by = c("RegioS" = "Regio")) %>% 
    mutate(
      ActualMortality = round(MortalityRate * Population / 100000),
      ExpectedMortality = round(ExpectedMortality * Population / 100000),
      AvgMortality = round(AvgMortality * Population / 100000),
      UnexpectedMortality = round(UnexpectedMortality * Population / 100000),
      ExcessiveMortality = round(ExcessiveMortality * Population / 100000),
    ) 

write_rds(data.spatial_absolute, "../results/plots/spatial_absolute.Rds")
```

#### Example

```{r}
tm_shape(data.spatial_absolute %>% filter(Period == 4)) +
  tm_polygons(
    col = "MortalityRate",
    id = "Gemeentenaam",
    title = "Mortality rate by municipality"
  )
```
### Summary table

```{r}
year_high_mort <- data.historic %>% group_by(Regio, Year) %>% 
  summarise(MortalityRate = mean(MortalityRate)) %>% 
  arrange(desc(MortalityRate)) %>%
  slice(1) %>% arrange(Regio) %>% ungroup() %>%
  select(YearHighestMort = Year, Regio)

data.summary <- data.mort %>% 
      filter(Period > 0 & Variable %in% c("Deaths", "Rivm", "DeathsAboveAvg", "DeathsUnexpected")) %>%
      group_by(Regio, Variable) %>%
      summarise(Value = floor(sum(Value, na.rm = T))) %>%
      spread(Variable, Value) %>%
      ungroup() %>%
      left_join(year_high_mort)


write_rds(data.summary, "../results/plots/summary.Rds")

data.mort %>% 
  filter(Regio == "GM0345" & Variable %in% c("Rivm")) %>% arrange(Date) %>% 
  group_by(Regio, Variable) %>% summarise(Value = sum(Value, na.rm = T)) %>% spread(Variable, Value)
data.summary %>% filter(Regio == "GM0345")
```

